* Figure 3: Mobility by Race and Cohort

clear
use "${data_dir}DavisMazumderData.dta"

* Sample Restrictions 
keep if age_firstSurvey<=18
drop if miss0 | miss1
drop if (mom_outOfRange==1 & linkMom==1 & !(dad_outOfRange==0 & linkDad==1)) | (dad_outOfRange==1 & linkDad==1 & !(mom_outOfRange==0 & linkMom==1)) | (linkMom==0 & linkDad==0) 
 


 * Generate income ranks after applying sample restrictions
gen N = .
 tab n  if surv79==0 & women==0  [w=weight]
 replace N = r(N) if surv79==0 & women==0 
 tab n  if surv79==0 & women==1   [w=weight]
 replace N = r(N) if surv79==0 & women==1 
 tab n  if surv79==1 & women==0   [w=weight]
 replace N = r(N) if surv79==1 & women==0 
 tab n  if surv79==1 & women==1 [w=weight]
 replace N = r(N) if surv79==1 & women==1 
 
* Family Income Rank in Parent Generation
set sortseed 6688571
sort women surv79 faminc0
by women surv79: gen __rank0 = sum(weight) 
gen _rank0 = __rank0/N 
by women surv79 faminc0: egen rank0 = mean(_rank0) 
 replace rank0 = 100*rank0
gen perc66 = rank0*(1-surv79)
gen perc79 = rank0*surv79

 
* Family Income Rank in Kid Generation
set sortseed 6688571
sort women surv79 faminc1
by women surv79: gen __rank1 = sum(weight)
gen _rank1 = __rank1/N 
bys women surv79 faminc1: egen rank1 = mean(_rank1) 
replace rank1 = 100*rank1

drop N __rank0 _rank0 __rank1 _rank1 


* -------------------------------- *
* Run regressions to make Figure 3 *
* -------------------------------- *

* Rank-Rank Figure

gen perc66_b =perc66*black
gen perc79_b = perc79*black
gen surv79_b = surv79*black
gen women_b = women*black

gen perc66_w =perc66*(1-black)
gen perc79_w = perc79*(1-black)
gen surv79_w = surv79*(1-black)
gen women_w = women*(1-black)

eststo clear
reg rank1 perc66_b perc66_w perc79_w perc79_b black surv79_w surv79_b women_w women_b [w=weight] , cluster(hhid)

graph twoway (function y = _b[_cons] +_b[black]+_b[perc66_b]*x, range(0 100) lcolor(gs10)) (function y=_b[_cons] +_b[perc66_w]*x, range(0 100) lcolor(black)) ///
	(function y = _b[_cons] + _b[surv79_b] +_b[black]+_b[perc79_b]*x, range(0 100) lcolor(gs10) lpattern(dash)) (function y=_b[_cons] + _b[surv79_w]+_b[perc79_w]*x, range(0 100) lcolor(black) lpattern(dash)) , ///
	legend(label(1 "66, Black") label(2 "66, Other Race") label(3 "79, Black") label(4 "79, Other Race") position(6) cols(2)) ///
	graphregion(color(white)) xtitle("Parent Percentile") ytitle("Expected Child Rank") ///
	ylabel(,nogrid)
	graph save ${results_dir}fig3_rank.gph, replace
	
	
* Log-Log Figure	
	
* Make regression adjusted figure
gen loginc66_b =loginc66*black
gen loginc79_b = loginc79*black

gen loginc66_w =loginc66*(1-black)
gen loginc79_w = loginc79*(1-black)

reg  lfaminc1 loginc66_w loginc79_w surv79_w women_w loginc66_b loginc79_b surv79_b women_b black [w=weight], cluster(hhid)

graph twoway (function y = _b[_cons] +_b[black]+_b[loginc66_b]*x, range(8 14) lcolor(gs10)) (function y=_b[_cons] +_b[loginc66_w]*x, range(8 14) lcolor(black) ) ///
	(function y = _b[_cons] + _b[surv79_b] +_b[black]+_b[loginc79_b]*x, range(8 14) lcolor(gs10) lpattern(dash)) (function y=_b[_cons] + _b[surv79_w]+_b[loginc79_w]*x, range(8 14) lcolor(black) lpattern(dash)) , ///
	legend(label(1 "66, Black") label(2 "66, Other Race") label(3 "79, Black") label(4 "79, Other Race")  position(6) cols(2)) ///
	graphregion(color(white)) xtitle("Log Parent Income") ytitle("Expected Log Child Income") ///
	ylabel(,nogrid)
	
	graph save ${results_dir}fig3_ige.gph, replace		

	

graph combine  ${results_dir}fig3_rank.gph ${results_dir}fig3_ige.gph, ///
	graphregion(color(white))
graph export ${results_dir}figure3.png, replace




* Appendix Table A17 *



* Baseline Rank-Rank
reg rank1 perc66 perc79 surv79 women [w=weight] , cluster(hhid)
eststo rank_p

test perc79=perc66
estadd scalar p = r(p): rank_p
gen s=e(sample)
count if surv79==0 & s==1
estadd scalar N66 = r(N): rank_p
count if surv79==1 & s==1
estadd scalar N79 = r(N): rank_p
drop s
lincom perc79-perc66
estadd scalar diff = r(estimate): rank_p
estadd scalar se = r(se): rank_p
global rank_main_est = r(estimate)
global rank_main_se = r(se)

reg rank1 perc79 rank0 surv79 women [w=weight] 
est store rank_main2

* Baseline IGE
reg lfaminc1  loginc66 loginc79 surv79 women [w=weight], cluster(hhid)
eststo IGE_p
test loginc79=loginc66
estadd scalar p = r(p): IGE_p
gen s=e(sample)
count if surv79==0 & s==1
estadd scalar N66 = r(N): IGE_p
count if surv79==1 & s==1
estadd scalar N79 = r(N): IGE_p
drop s
lincom loginc79-loginc66
estadd scalar diff = r(estimate): IGE_p
estadd scalar se = r(se): IGE_p
global ige_main_est = r(estimate)
global ige_main_se = r(se)

reg lfaminc1  lfaminc0 loginc79 surv79 women [w=weight]
est store IGE_main2

*  Control for race - rank-rank
reg rank1 perc66 perc79 surv79 women black [w=weight] , cluster(hhid)
eststo rank_control
test perc79=perc66
estadd scalar p = r(p): rank_control
gen s=e(sample)
count if surv79==0 & s==1
estadd scalar N66 = r(N): rank_control
count if surv79==1 & s==1
estadd scalar N79 = r(N): rank_control
drop s
lincom perc79-perc66
estadd scalar diff = r(estimate): rank_control
estadd scalar se = r(se): rank_control
global rank_main_est = r(estimate)
global rank_main_se = r(se)

reg rank1 perc79 rank0 surv79 women black [w=weight]
est store rank_control2

suest rank_main2 rank_control2, cluster(hhid)
test [rank_main2_mean]perc79=[rank_control2_mean]perc79
estadd scalar p_main = r(p): rank_control

* Control for race - IGE
reg lfaminc1  loginc66 loginc79 surv79 women black [w=weight], cluster(hhid)
eststo IGE_control
test loginc79=loginc66
estadd scalar p = r(p): IGE_control
gen s=e(sample)
count if surv79==0 & s==1
estadd scalar N66 = r(N): IGE_control
count if surv79==1 & s==1
estadd scalar N79 = r(N): IGE_control
drop s
lincom loginc79-loginc66
estadd scalar diff = r(estimate): IGE_control
estadd scalar se = r(se): IGE_control
global ige_main_est = r(estimate)
global ige_main_se = r(se)

reg lfaminc1  lfaminc0 loginc79 surv79 women black [w=weight]
est store IGE_control2

suest IGE_main2 IGE_control2, cluster(hhid)
test [IGE_main2_mean]loginc79=[IGE_control2_mean]loginc79
estadd scalar p_main = r(p): IGE_control


* Rank-rank, Non-Black Only
reg rank1 perc66 perc79 surv79 women [w=weight] if black==0 , cluster(hhid)
eststo rank_w
est store rank_w
test perc79=perc66
estadd scalar p = r(p): rank_w
gen s=e(sample)
count if surv79==0 & s==1
estadd scalar N66 = r(N): rank_w
count if surv79==1 & s==1
estadd scalar N79 = r(N): rank_w
drop s
lincom perc79-perc66
estadd scalar diff = r(estimate): rank_w
estadd scalar se = r(se): rank_w
global rank_main_est = r(estimate)
global rank_main_se = r(se)

reg rank1 perc79 rank0 surv79 women [w=weight] if black==0 
est store rank_w2
suest rank_main2 rank_w2, cluster(hhid)
test [rank_main2_mean]perc79=[rank_w2_mean]perc79
estadd scalar p_main = r(p): rank_w

* IGE, Non-Black Only
reg lfaminc1  loginc66 loginc79 surv79 women [w=weight] if black==0, cluster(hhid)
eststo IGE_w
test loginc79=loginc66
estadd scalar p = r(p): IGE_w
gen s=e(sample)
count if surv79==0 & s==1
estadd scalar N66 = r(N): IGE_w
count if surv79==1 & s==1
estadd scalar N79 = r(N): IGE_w
drop s
lincom loginc79-loginc66
estadd scalar diff = r(estimate): IGE_w
estadd scalar se = r(se): IGE_w
global ige_main_est = r(estimate)
global ige_main_se = r(se)

reg lfaminc1  lfaminc0 loginc79 surv79 women [w=weight] if black==0
est store IGE_w2
suest IGE_main2 IGE_w2, cluster(hhid)
test [IGE_main2_mean]loginc79=[IGE_w2_mean]loginc79
estadd scalar p_main = r(p): IGE_w

* Rank-rank, Black
reg rank1 perc66 perc79 surv79 women [w=weight] if black==1 , cluster(hhid)
eststo rank_b
est store rank_b
test perc79=perc66
estadd scalar p = r(p): rank_b
gen s=e(sample)
count if surv79==0 & s==1
estadd scalar N66 = r(N): rank_b
count if surv79==1 & s==1
estadd scalar N79 = r(N): rank_b
drop s
lincom perc79-perc66
estadd scalar diff = r(estimate): rank_b
estadd scalar se = r(se): rank_b
global rank_main_est = r(estimate)
global rank_main_se = r(se)

reg rank1 perc79 rank0 surv79 women [w=weight] if black==1 
est store rank_b2
suest rank_main2 rank_b2, cluster(hhid)
test [rank_main2_mean]perc79=[rank_b2_mean]perc79
estadd scalar p_main = r(p): rank_b

* IGE, Black
reg lfaminc1  loginc66 loginc79 surv79 women [w=weight] if black==1, cluster(hhid)
eststo IGE_b
test loginc79=loginc66
estadd scalar p = r(p): IGE_b
gen s=e(sample)
count if surv79==0 & s==1
estadd scalar N66 = r(N): IGE_b
count if surv79==1 & s==1
estadd scalar N79 = r(N): IGE_b
drop s
lincom loginc79-loginc66
estadd scalar diff = r(estimate): IGE_b
estadd scalar se = r(se): IGE_b
global ige_main_est = r(estimate)
global ige_main_se = r(se)

reg lfaminc1  lfaminc0 loginc79 surv79 women [w=weight] if black==1
est store IGE_b2

suest IGE_main2 IGE_b2, cluster(hhid)
test [IGE_main2_mean]loginc79=[IGE_b2_mean]loginc79
estadd scalar p_main = r(p): IGE_b



#delimit ;
estout rank_p IGE_p rank_control IGE_control rank_w  IGE_w rank_b  IGE_b
	using  "${results_dir}tableA17.txt", replace
	keep(perc66 perc79) 
	rename(loginc66  perc66 loginc79 perc79)
	cells(b(star fmt(3))  se(par fmt(3))) 
	collabels(,none) stat(diff se p  N66 N79 p_main) stardrop(*) ;	
#delimit cr	
